import re
import time

import pymongo
import requests
from bs4 import BeautifulSoup
import csv
import codecs
from lxml import etree



f  = codecs.open('ip信息.csv','a','gbk')
w = csv.writer(f)

def xici():
        # headers = {"User-Agent":"Mozilla/5.0 (windows NT 6.1; wow6) Applewebkit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 safari/537.6"}
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'
    }
    for i in range(100,102):
        time.sleep(2)
        print('第'+str(i)+'页')
        url = 'http://www.xiladaili.com/gaoni/%s'%str(i)
        print(url)
        response = requests.get(url,headers=headers)
        soup = BeautifulSoup(response.text,'html.parser')
        # print(soup)
        all_strs = soup.select("tbody > tr")
        for tr in all_strs:
            all_tds = tr.find_all('td')
            ip = all_tds[0].get_text()
            types = all_tds[1].get_text()
            http = types.splipt(',')[0] if ','in types else ''.join(re.findall(r'[A-Za-z]',types))

            proxies = {http:f'{http}://{ip}'}
            print(proxies)
            try:
                r = requests.get('http://www.ip.cn/',headers=headers,proxies=proxies,timeout=3)
                html = r.text
            except:
                print('fail-%s' %ip)
            else:
                print('seccess-%s' %ip)
                soup1 = BeautifulSoup(html,'lxml')
                div = soup1.find(class_='well')

                if div:
                    print(div.text)
                w.writerow([ip,types])
                print('ip写入成功')

if __name__ == '__main__':
    xici()





#  ‘老师代码’

# import codecs
# import re
#
# from bs4 import BeautifulSoup
# import csv  # pip install csv
#
# f = codecs.open('ip信息3.csv', 'a', 'gbk')
# w = csv.writer(f)
# w.writerow(["IP", '协议'])
#
# '''
# 找地址
#     观察请求头和参数 有没有乱码
#     是静态数据的  后端返回的带数据的html
# 测试
# 翻页
#     http://www.xiladaili.com/gaoni/1/   最后一个数
#     for i in range(5):
#
#
#
# '''
# headers = {
#     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'
#     }
# import requests
# for i in range(100,102):
#         print('第'+str(i)+'页')
#         url = 'http://www.xiladaili.com/gaoni/%s'%str(i)
#         res = requests.get(url)
#         soup = BeautifulSoup(res.text, 'html.parser')
#         all_trs = soup.select("tbody > tr")
#         for tr in all_trs:
#             all_tds = tr.find_all('td')
#             ip = all_tds[0].get_text()  # 正常提取
#             types = all_tds[1].get_text()
#             # 去掉中文 和 双协议
#             http = types.split(',')[0] if ',' in types else ''.join(re.findall(r'[A-Za-z]', types))
#             # 检测是否正常
#             proxies = {http: f'{http}://' + ip}
#             try:
#                 r = requests.get('https://www.ip.cn/', headers = headers, proxies = proxies, timeout = 3)
#                 html = r.text
#             except:
#                 print('fail-%s' % ip)
#             else:
#                 print('success-%s' % ip)
#                 soup = BeautifulSoup(html, 'lxml')
#                 div = soup.find(class_ = 'well')
#                 if div:
#                     print(div.text)
#                 # 写入数据
#                 w.writerow([ip, http])